- Students receive and act on Ch.13,14 feedback.
- Discuss a modern, simulation-based workflow for Bayesian time series modeling.
- Students apply computational methods to real world data (Final Exam/Poster).
4 May 2021 (updated: 2021-04-20)
Please contribute questions and comments
boot::simplex().(I hope) that you’ll be able to…
stan and R.rethinking R package [@McElreath2020] that occupanying the textbook Statistical Rethinking.rethinking is a very convenient suite of functions for Bayesian inference that uses/works well with the Hamiltonian Monte Carlo - based software stan.rethinking is essential the same way you would write it down in prose.\[ P(\theta | data) = \frac{P(data | \theta) P(\theta)}{\int_\Omega P(data | \theta) P(\theta) d\theta} \]
\[\pi(\theta \vert data) \propto \pi(\theta) \times \pi(data \vert \theta)\].
\[ \begin{split} y_i &\sim \mathcal{N}( \alpha + \beta x_i , \sigma ) \\ \alpha &\sim p(\alpha) \\ \beta &\sim p(\beta) \\ \sigma &\sim p(\sigma) \\ \end{split} \]
\[ \pi(\theta | data) = \frac{\pi(data | \theta) \pi(\theta)}{\int_\Omega \pi(data | \theta) \pi(\theta) d\theta} \]
\[ \mathrm{lppd}(y, \Theta) = \sum_i \log \frac{1}{S} \sum_s p(y_i | \Theta_s) \]
\[ \mathrm{lppd}_{CV} = \sum_i \frac{1}{S} \sum_s \log p(y_{i} | \theta_{-i,s}) \]
cherry_blossoms data setPeople in Japan have been recording cherry blossom timings antd climate data for over 1000 years, from 801 - 2015, in our data set below.
library(rethinking)
## Loading required package: rstan
## Loading required package: StanHeaders
## rstan (Version 2.21.2, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling ## options(mc.cores = parallel::detectCores()). ## To avoid recompilation of unchanged Stan programs, we recommend calling ## rstan_options(auto_write = TRUE)
## ## Attaching package: 'rstan'
## The following object is masked from 'package:tidyr': ## ## extract
## Loading required package: parallel
## rethinking (Version 2.11)
## ## Attaching package: 'rethinking'
## The following object is masked from 'package:purrr': ## ## map
## The following object is masked from 'package:stats': ## ## rstudent
data(cherry_blossoms) d <- cherry_blossoms precis(d)
|
|
|
|
|
d2 <- d[ complete.cases(d$doy) , ] # complete cases on doy n <- nrow(d2) d2$doy1 <- c( NA, d2$doy[-n] ) # add a lag d2 <- d2[ -1, ] ## for model comparison later
doy: first day of blooms\[ \begin{split} y_i &\sim \mathcal{N}( \alpha + \beta*year_i , \sigma ) \\ \alpha &\sim N(100, 10) \\ \beta &\sim N(0, 0.1) \\ \sigma &\sim Exponential(1) \\ \end{split} \]
m0 <- ulam(
alist(
y ~ dnorm( mu, sigma ) ,
mu <- a + b*year ,
a ~ dnorm(100, 10) ,
b ~ dnorm(0, 0.1),
sigma ~ dexp(1)
) , data = list( y=d2$doy, year=d2$year),
log_lik = T)
## prior predictive checks
prior <- extract.prior( m0 )
plot( NULL, xlim = range(d2$year),
ylim= c(50, 150), xlab="Year", ylab="Lines Sampled from the Prior Predictive" )
N <- 100
for (i in 1:N ) {
curve( prior$a[[i]] + prior$b[[i]]*x ,
from = min(d2$year), to = max(d2$year), add = T ,
col = col.alpha( "black", 0.2 ) )
}
## precis( m0 ) post <- extract.samples( m0 ) mu <- link( m0 ) mu_PI <- apply( mu, 2, PI, 0.89 ) d3 <- data.frame( m0_mean = colMeans(mu), m0_lower = mu_PI[ 1, ], m0_upper = mu_PI[ 2, ] ) d3 <- cbind( d2, d3 )
mu <- link( m0 ) mu_PI <- apply( mu, 2, PI, 0.89 ) D_sim <- sim( m0, n = 500 ) D_PI <- apply( D_sim, 2, PI, 0.89 )
\[ \begin{split} y_i &\sim \mathcal{N}( \alpha + \beta * y_{i-1}, \sigma ) \\ \alpha &\sim N(100, 10) \\ \beta &\sim N(0, 10) \\ \sigma &\sim Exponential(1) \\ \end{split} \]
m1 <- ulam(
alist(
y ~ dnorm( mu, sigma ) ,
mu <- a + b*doy1,
a ~ dnorm(100, 10) ,
b ~ dnorm(0, 10),
sigma ~ dexp(1)
) , data = list( y=d2$doy, doy1=d2$doy1),
log_lik = T)
## prior predictive checksp
prior <- extract.prior( m1 )
N <- 100
ySim <- sapply( 1:N, function(k) ( prior$a[k] + prior$b[k]*d2$doy1 ) )
plot( NULL, xlim = range(d2$year),
ylim= range(ySim), xlab="Year", ylab="Lines Sampled from the Prior Predictive" )
for (i in 1:N ) {
lines(x = d2$year, y = ySim[, i],
col = col.alpha( "black", 0.2 ) )
}
Try more informative prior:
m11 <- ulam(
alist(
y ~ dnorm( mu, sigma ) ,
mu <- a + b*doy1,
a ~ dnorm(100, 5) ,
b ~ dnorm(0, 0.5),
sigma ~ dexp(1)
) , data = list( y=d2$doy, doy1=d2$doy1),
log_lik = T)
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable. ## Running the chains for more iterations may help. See ## http://mc-stan.org/misc/warnings.html#tail-ess
## prior predictive checks
prior <- extract.prior( m11 )
## vizualize
N <- 100
ySim <- sapply( 1:N, function(k) ( prior$a[k] + prior$b[k]*d2$doy1 ) )
plot( NULL, xlim = range(d2$year),
ylim= range(ySim), xlab="Year", ylab="Lines Sampled from the Prior Predictive" )
for (i in 1:N ) {
lines(x = d2$year, y = ySim[, i],
col = col.alpha( "black", 0.2 ) )
}
precis( m11)
|
|
|
|
|
|
post <- extract.samples( m11 ) mu <- link( m11 ) mu_PI <- apply( mu, 2, PI, 0.89 ) d4 <- data.frame( m1_mean = colMeans(mu), m1_lower = mu_PI[ 1, ], m1_upper = mu_PI[ 2, ] ) d4 <- cbind( d3, d4 )
mu <- link( m11 ) mu_PI <- apply( mu, 2, PI, 0.89 ) D_sim <- sim( m11, n = 500 ) D_PI <- apply( D_sim, 2, PI, 0.89 )
The likelihood:
\[ \begin{split} D_i &\sim \mathcal{N}( \mu_i , \sigma ) \\ \mu_i &= \alpha + \sum_{k=1}^K w_k B_{k,i}\\ \end{split} \]
With priors:
\[ \begin{split} \alpha &\sim N(100, 10) \\ w_j &\sim N(0, 10) \\ \sigma &\sim Exponential(1) \\ \end{split} \]
num_knots <- 15
knot_list <- quantile( d4[ , "year"], probs = seq( 0, 1, length.out = num_knots ) )
library(splines)
B <- bs( d4[ , "year"],
knots = knot_list[ -c(1, num_knots) ],
degree = 3, intercept = TRUE )
plot ( NULL, xlim=range(d4[ , "year"]), ylim = c(0,1), xlab="year", ylab="basis") for (i in 1:ncol(B) ) lines( d4[ , "year"], B[ ,i ] )
m2 <- quap(
alist(
D ~ dnorm( mu, sigma ) ,
mu <- a + B %*% w,
a ~ dnorm(100, 10) ,
w ~ dnorm(0, 10),
sigma ~ dexp(1)
) , data = list( D=d4$doy, B = B ) ,
start = list( w = rep( 0, ncol(B) ) ) )
precis( m2 )
## 17 vector or matrix parameters hidden. Use depth=2 to show them.
|
|
|
|
post <- extract.samples( m2 ) mu <- link( m2 ) mu_PI <- apply( mu, 2, PI, 0.89 ) d5 <- data.frame( m2_mean = colMeans(mu), m2_lower = mu_PI[ 1, ], m2_upper = mu_PI[ 2, ] ) d5 <- cbind( d4, d5 )
mu <- link( m2 ) mu_PI <- apply( mu, 2, PI, 0.89 ) D_sim <- sim( m2, n = 1e4 ) D_PI <- apply( D_sim, 2, PI, 0.89 )
looRather than fitting n models to estimate the loo-cv error, you can use a Pareto Smoothed Importance Sampled estimate [@Vehtari2016a].
compare( m0, m1, m11, m2, func=PSIS )
## Warning in compare(m0, m1, m11, m2, func = PSIS): Not all model fits of same class. ## This is usually a bad idea, because it implies they were fit by different algorithms. ## Check yourself, before you wreck yourself.
|
|
|
|
|
|
I’m glad to answer any questions while I display the references.